2-1. Dependency Measures
library(reshape2) # melt function
library(ggplot2) # ggplot function
library(pcaPP) # Fast Kendall function
library(energy) # Distance Correlation
library(Hmisc) # Hoeffding's D measure
library(zebu) # Normalized Mutual Information
# library(minerva) # Maximum Information Coefficient
library(XICOR) # Chatterjee's Coefficient
# library(dHSIC) # Hilbert Schmidt Independence Criterion
library(VineCopula) # Blomqvist's Beta
make_cormat <- function(dat_mat){
matrix_dat <- matrix(nrow = ncol(dat_mat), ncol = ncol(dat_mat))
cor_mat_list <- list()
basic_cor <- c("pearson", "spearman")
# find each of the correlation matrices with Pearson, Spearman, Kendall Correlation Coefficients
for (i in 1:2){
print(i)
cor_mat <- stats::cor(dat_mat, method = basic_cor[i])
cor_mat[upper.tri(cor_mat, diag = T)] <- NA
cor_mat_list[[i]] <- cor_mat
}
# functions that take matrix or data.frame as input
no_loop_function <- c(pcaPP::cor.fk, Hmisc::hoeffd,
VineCopula::BetaMatrix)
for (i in 3:5){
print(i)
fun <- no_loop_function[[i-2]]
cor_mat <- fun(dat_mat)
if (i == 4){ # Hoeffding's D
cor_mat <- cor_mat$D
}
cor_mat[upper.tri(cor_mat, diag = T)] <- NA
cor_mat_list[[i]] <- cor_mat
}
# functions that take two variables as input to calculate correlations.
need_loop <- c(zebu::lassie, energy::dcor2d, XICOR::calculateXI)
for (i in 6:8){
print(i)
fun <- need_loop[[i-5]]
cor_mat <- matrix(nrow = ncol(dat_mat),
ncol = ncol(dat_mat))
for (j in 2:ncol(dat_mat)){
for (k in 1:(j-1)){
if (i == 6){
cor_mat[j, k] <- fun(cbind(dat_mat[, j], dat_mat[, k]), continuous=c(1,2), breaks = 6, measure = "npmi")$global
} else {
cor_mat[j, k] <- fun(as.numeric(dat_mat[, j]),
as.numeric(dat_mat[, k]))
}
}
}
cor_mat[upper.tri(cor_mat, diag = T)] <- NA
cor_mat_list[[i]] <- cor_mat
}
return(cor_mat_list)
}
draw_heatmap <- function(cor_mat){
len <- 8
melted_cormat <- melt(cor_mat)
melted_cormat <- melted_cormat[!is.na(melted_cormat$value),]
break_vec <- round(as.numeric(quantile(melted_cormat$value,
probs = seq(0, 1, length.out = len),
na.rm = T)),
4)
break_vec[1] <- break_vec[1]-1
break_vec[len] <- break_vec[len]+1
melted_cormat$value <- cut(melted_cormat$value, breaks = break_vec)
heatmap_color <- unique(melted_cormat$value)
heatmap <- ggplot(data = melted_cormat, aes(x = Var2, y = Var1, fill = value))+
geom_tile(colour = "Black") +
ggplot2::scale_fill_manual(breaks = sort(heatmap_color),
values = rev(scales::viridis_pal(begin = 0, end = 1)
(length(heatmap_color)))) +
theme_bw() + # make the background white
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.ticks = element_blank(),
# erase tick marks and labels
axis.text.x = element_blank(), axis.text.y = element_blank())
return (heatmap)
}
make_cor_heatmap <- function(dat_mat, cor_mat_list){
fun_lable <- c("Pearson's Correlation", "Spearman's Correlation", "Kendall's Correlation",
"Hoeffding's D", "Blomqvist's Beta", "NMI",
"Distance Correlation", "XI Correlation")
cor_heatmap_list <- list()
cor_abs_heatmap_list <- list()
# make correlation matrices
#cor_mat_list <- make_cormat(dat_mat)
for (i in 1:8){
cor_mat <- cor_mat_list[[i]]
# get heatmaps
cor_heatmap <- draw_heatmap(cor_mat)
# ggplot labels
ggplot_labs <- labs(title = paste("Heatmap of", fun_lable[i]),
x = "",
y = "",
fill = "Coefficient") # change the title and legend label
cor_heatmap_list[[i]] <- cor_heatmap + ggplot_labs
if (i %in% c(1,2,3,4,6)){
cor_abs_mat <- abs(cor_mat_list[[i]])
cor_abs_heatmap <- draw_heatmap(cor_abs_mat)
ggplot_abs_labs <- labs(title = paste("Abs Heatmap of", fun_lable[i]),
x = "", # change the title and legend label
y = "",
fill = "Coefficient")
cor_abs_heatmap_list[[i]] <- cor_abs_heatmap + ggplot_abs_labs
} else {
ggplot_abs_labs <- labs(title = paste("Abs Heatmap of", fun_lable[i]),
subtitle = "Equivalent to Non-Abs Heatmap",
x = "", # change the title and legend label
y = "",
fill = "Coefficient")
cor_abs_heatmap_list[[i]] <- cor_heatmap + ggplot_abs_labs
}
}
ans <- list(cor_heatmap_list, cor_abs_heatmap_list)
return (ans)
}
# cormat_list <- make_cormat(sub_dat)
# lst <- make_cor_heatmap(sub_dat, cormat_list)
load("seurat_corr.RData")
cor_pearson_mat <- cormat_list[[1]]; cor_spearman_mat <- cormat_list[[2]];
cor_kendall_mat <- cormat_list[[3]]; cor_hoeffd_mat <- cormat_list[[4]];
cor_blomqvist_mat <- cormat_list[[5]]; cor_dist_mat <- cormat_list[[6]];
cor_MI_mat <- cormat_list[[7]]; cor_XI_mat <- cormat_list[[8]];
1. Pearson’s correlation coefficient
cor_pearson_mat[1:5,1:5]
## IGKC MALAT1 HBB HBA2 HBA1
## IGKC NA NA NA NA NA
## MALAT1 0.032614908 NA NA NA NA
## HBB -0.004736592 0.029307231 NA NA NA
## HBA2 -0.002841992 -0.018170839 0.9418259 NA NA
## HBA1 -0.003328044 -0.003815931 0.9624631 0.9927025 NA
# plot the smallest correlations
cor_pearson_vec <- sort(abs(cor_pearson_mat), decreasing = T)
plot(cor_pearson_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_pearson_mat) == cor_pearson_vec[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_pearson_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_pearson_mat) == rev(cor_pearson_vec)[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_pearson_mat[idx1, idx2], 3)))
}

Heatmap
heatmap_list[[1]][[1]]

2. Spearman’s correlation coefficient
cor_spearman_mat[1:5,1:5]
## IGKC MALAT1 HBB HBA2 HBA1
## IGKC NA NA NA NA NA
## MALAT1 0.12899695 NA NA NA NA
## HBB 0.09838046 0.15514668 NA NA NA
## HBA2 0.06958243 0.04661870 0.2025643 NA NA
## HBA1 0.10954437 0.06171355 0.1585412 0.2075422 NA
# plot the smallest correlations
cor_spearman_vec <- sort(abs(cor_spearman_mat), decreasing = T)
plot(cor_spearman_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_spearman_mat) == cor_spearman_vec[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_spearman_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_spearman_mat) == rev(cor_spearman_vec)[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_spearman_mat[idx1, idx2], 3)))
}

Heatmap
heatmap_list[[1]][[2]]

3. Kendall’s Tau
cor_kendall_mat[1:5,1:5]
## IGKC MALAT1 HBB HBA2 HBA1
## IGKC NA NA NA NA NA
## MALAT1 0.09571851 NA NA NA NA
## HBB 0.08034182 0.11526436 NA NA NA
## HBA2 0.05959979 0.03634183 0.1781212 NA NA
## HBA1 0.09646614 0.04934756 0.1412646 0.194935 NA
# plot the smallest correlations
cor_kendall_vec <- sort(abs(cor_kendall_mat), decreasing = T)
plot(cor_kendall_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_kendall_mat) == cor_kendall_vec[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_kendall_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_kendall_mat) == rev(cor_kendall_vec)[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_kendall_mat[idx1, idx2], 3)))
}

Heatmap
heatmap_list[[1]][[3]]

4. Hoeffding’s D
cor_hoeffd_mat[1:5,1:5]
## IGKC MALAT1 HBB HBA2 HBA1
## IGKC NA NA NA NA NA
## MALAT1 0.0034314410 NA NA NA NA
## HBB 0.0017030871 0.0057650400 NA NA NA
## HBA2 0.0003065225 -0.0003003155 0.007154703 NA NA
## HBA1 0.0010485857 -0.0003210765 0.003211965 0.004557526 NA
# plot the smallest correlations
cor_hoeffd_vec <- sort(abs(cor_hoeffd_mat), decreasing = T)
plot(cor_hoeffd_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_hoeffd_mat) == cor_hoeffd_vec[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_hoeffd_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_hoeffd_mat) == rev(cor_hoeffd_vec)[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_hoeffd_mat[idx1, idx2], 3)))
}

Heatmap
heatmap_list[[1]][[4]]

5. Blomqvist’s Beta
cor_blomqvist_mat[1:5,1:5]
## [,1] [,2] [,3] [,4] [,5]
## [1,] NA NA NA NA NA
## [2,] 0.23564356 NA NA NA NA
## [3,] 0.13663366 0.449505 NA NA NA
## [4,] -0.03168317 -0.170297 0.04356436 NA NA
## [5,] 0.01584158 -0.360396 -0.08316832 0.2 NA
# plot the smallest correlations
cor_blomqvist_vec <- sort(abs(cor_blomqvist_mat), decreasing = T)
plot(cor_blomqvist_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_blomqvist_mat) == cor_blomqvist_vec[i], arr.ind = T)
idx1 <- idx[i,1]; idx2 <- idx[i,2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_blomqvist_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_blomqvist_mat) == rev(cor_blomqvist_vec)[i], arr.ind = T)
idx1 <- idx[i,1]; idx2 <- idx[i,2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_blomqvist_mat[idx1, idx2], 3)))
}

Heatmap
heatmap_list[[1]][[5]]

6. Distance Correlation
cor_dist_mat[1:5,1:5]
## [,1] [,2] [,3] [,4] [,5]
## [1,] NA NA NA NA NA
## [2,] 0.635205335 NA NA NA NA
## [3,] 0.010240862 0.2187141 NA NA NA
## [4,] 0.006441542 0.3089890 0.9956432 NA NA
## [5,] 0.007377620 0.2135901 0.9966008 0.9989115 NA
# plot the smallest correlations
cor_dist_vec <- sort(abs(cor_dist_mat), decreasing = T)
plot(cor_dist_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_dist_mat) == cor_dist_vec[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_dist_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_dist_mat) == rev(cor_dist_vec)[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_dist_mat[idx1, idx2], 3)))
}

Heatmap
heatmap_list[[1]][[6]]

7. Normalized Mutual Information
cor_MI_mat[1:5,1:5]
## [,1] [,2] [,3] [,4] [,5]
## [1,] NA NA NA NA NA
## [2,] 0.0049341243 NA NA NA NA
## [3,] 0.0011046394 0.007017202 NA NA NA
## [4,] 0.0007304058 0.002170969 0.7986327 NA NA
## [5,] 0.0009833885 0.002821310 0.8932718 0.9565026 NA
# plot the smallest correlations
cor_MI_vec <- sort(abs(cor_MI_mat), decreasing = T)
plot(cor_MI_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_MI_mat) == cor_MI_vec[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_MI_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_MI_mat) == rev(cor_MI_vec)[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_MI_mat[idx1, idx2], 3)))
}

Heatmap
heatmap_list[[1]][[7]]

8. Chatterjee’s XI Correlation
cor_XI_mat[1:5,1:5]
## [,1] [,2] [,3] [,4] [,5]
## [1,] NA NA NA NA NA
## [2,] 0.04743160 NA NA NA NA
## [3,] 0.03878761 0.071688601 NA NA NA
## [4,] -0.02999729 0.019550995 0.02824899 NA NA
## [5,] -0.02035673 0.003777234 0.02328462 0.02216266 NA
# plot the smallest correlations
cor_XI_vec <- sort(abs(cor_XI_mat), decreasing = T)
plot(cor_XI_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_XI_mat) == cor_XI_vec[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_XI_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
idx <- which(abs(cor_XI_mat) == rev(cor_XI_vec)[i], arr.ind = T)
idx1 <- idx[1]; idx2 <- idx[2]
plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"),
main = paste0("Correlation of ", round(cor_XI_mat[idx1, idx2], 3)))
}

Heatmap
heatmap_list[[1]][[8]]
